merged_df <- read.csv("./all_years/all_yrs.csv")
norm_df <- read.csv("./all_years/all_yrs_norm.csv")proj_eda Normalized Data
Perform EDA (Exploratory Data Analysis) using some Data Visualizations.
| UNITID | school_name.x | coach_comp_third_2018 | support_comp_third_2018 | coach_comp_2018 | guarantees_2018 | total_ops_2018 | aid_2018 | support_comp_2018 | severance_2018 | recruiting_2018 | teamtravel_2018 | equip_2018 | game_expenses_2018 | marketing_2018 | camps_2018 | spirit_2018 | facilities_rentals_2018 | overhead_2018 | indirect_institutional_2018 | medical_2018 | memberships_2018 | meals_2018 | otherops_2018 | bowl_expense_2018 | prof_def_2018 | bowl_coachcomp_2018 | total_expendature.x | discounts_to_tuition.x | gifts.x | reseach_expendatures.x | graduation_rate.x | school_name.y | coach_comp_third_2019 | support_comp_third_2019 | coach_comp_2019 | guarantees_2019 | total_ops_2019 | aid_2019 | support_comp_2019 | severance_2019 | recruiting_2019 | teamtravel_2019 | equip_2019 | game_expenses_2019 | marketing_2019 | camps_2019 | spirit_2019 | facilities_rentals_2019 | overhead_2019 | indirect_institutional_2019 | medical_2019 | memberships_2019 | meals_2019 | otherops_2019 | bowl_expense_2019 | prof_def_2019 | bowl_coachcomp_2019 | total_expendature.y | discounts_to_tuition.y | gifts.y | reseach_expendatures.y | graduation_rate.y | school_name.x.x | coach_comp_third_2020 | support_comp_third_2020 | coach_comp_2020 | guarantees_2020 | total_ops_2020 | aid_2020 | support_comp_2020 | severance_2020 | recruiting_2020 | teamtravel_2020 | equip_2020 | game_expenses_2020 | marketing_2020 | camps_2020 | spirit_2020 | facilities_rentals_2020 | overhead_2020 | indirect_institutional_2020 | medical_2020 | memberships_2020 | meals_2020 | otherops_2020 | bowl_expense_2020 | prof_def_2020 | bowl_coachcomp_2020 | total_expendature.x.x | discounts_to_tuition.x.x | gifts.x.x | reseach_expendatures.x.x | graduation_rate.x.x | school_name.y.y | coach_comp_third_2021 | support_comp_third_2021 | coach_comp_2021 | guarantees_2021 | total_ops_2021 | aid_2021 | support_comp_2021 | severance_2021 | recruiting_2021 | teamtravel_2021 | equip_2021 | game_expenses_2021 | marketing_2021 | camps_2021 | spirit_2021 | facilities_rentals_2021 | overhead_2021 | indirect_institutional_2021 | medical_2021 | memberships_2021 | meals_2021 | otherops_2021 | bowl_expense_2021 | prof_def_2021 | bowl_coachcomp_2021 | total_expendature.y.y | discounts_to_tuition.y.y | gifts.y.y | reseach_expendatures.y.y | graduation_rate.y.y | school_name | coach_comp_third_2022 | support_comp_third_2022 | coach_comp_2022 | guarantees_2022 | total_ops_2022 | aid_2022 | support_comp_2022 | severance_2022 | recruiting_2022 | teamtravel_2022 | equip_2022 | game_expenses_2022 | marketing_2022 | camps_2022 | spirit_2022 | facilities_rentals_2022 | overhead_2022 | indirect_institutional_2022 | medical_2022 | memberships_2022 | meals_2022 | otherops_2022 | bowl_expense_2022 | prof_def_2022 | bowl_coachcomp_2022 | total_expendature | discounts_to_tuition | gifts | reseach_expendatures | graduation_rate |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 199139 | University of North Carolina at Charlotte | 0 | 0 | 37.6084 | 2.9891 | 242.5071 | 52.6928 | 30.7106 | 0.7800 | 4.3619 | 18.8738 | 8.0816 | 10.2208 | 6.0467 | 0.0000 | 1.0905 | 16.1160 | 18.4733 | 18.3615 | 2.3835 | 2.6468 | 2.3096 | 8.7605 | 0.0000 | 11.7992 | 0.000 | 496.8135 | 350.5052 | 79.1712 | 205.3767 | 0.1894 | University of North Carolina at Charlotte | 0 | 0 | 46.0872 | 2.8997 | 271.6001 | 56.0832 | 38.5333 | 2.5983 | 5.3454 | 19.2602 | 8.6700 | 10.8250 | 6.0144 | 0.0000 | 1.2671 | 16.9183 | 18.7239 | 22.9167 | 2.6851 | 2.7974 | 2.3938 | 7.5812 | 0.0000 | 17.0372 | 0.0000 | 560.2375 | 408.7012 | 140.7289 | 223.2607 | 0.1950 | University of North Carolina at Charlotte | 0 | 0 | 52.2119 | 4.9678 | 288.2604 | 54.6535 | 38.5716 | 2.9587 | 5.1489 | 19.0337 | 11.7427 | 10.2088 | 5.0488 | 0.0000 | 0.9782 | 17.7532 | 19.1325 | 24.7843 | 2.5789 | 3.0881 | 2.1140 | 10.3439 | 2.2147 | 12.6100 | 0.7262 | 589.1308 | 426.4901 | 91.8382 | 238.1076 | 0.2008 | University of North Carolina at Charlotte | 0 | 0 | 49.3748 | 0.7440 | 276.3402 | 59.5972 | 36.1769 | 0.7783 | 1.0136 | 13.7193 | 12.9504 | 4.8375 | 2.9154 | 0.0000 | 0.6045 | 17.6107 | 18.0663 | 21.0953 | 21.9792 | 2.5678 | 3.0147 | 9.2944 | 0 | NA | 0 | 552.6804 | 438.2813 | 84.8773 | 290.3364 | 0.2160 | University of North Carolina at Charlotte | 0 | 0 | 55.9542 | 4.6309 | 299.9096 | 52.9917 | 44.0449 | 0.0488 | 5.5407 | 26.4480 | 10.7449 | 12.2512 | 4.5430 | 0.0000 | 1.6601 | 18.2134 | 18.9926 | 25.7351 | 2.7753 | 3.0140 | 4.7238 | 7.5973 | 0.0000 | 14.3229 | 0.0000 | 614.1422 | 438.1081 | 98.0369 | 215.8536 | 0.2338 |
| 104151 | Arizona State University Campus Immersion | 12500 | 0 | 45.8938 | 3.9856 | 286.5229 | 35.5016 | 43.5892 | 29.0665 | 3.4887 | 13.0890 | 14.3161 | 9.4625 | 11.1612 | 0.0491 | 1.2376 | 34.0412 | 20.4243 | 4.3900 | 3.6847 | 0.0983 | 4.1739 | 4.8612 | 2.8082 | -29.7086 | 1.172 | 543.3373 | 845.4414 | 171.2845 | 752.6091 | 0.0920 | Arizona State University Campus Immersion | 0 | 0 | 44.2249 | 3.2532 | 266.4233 | 37.2985 | 46.2299 | 0.2714 | 3.6387 | 13.7295 | 13.1854 | 9.2089 | 11.7766 | 0.0000 | 1.8137 | 25.1888 | 37.0526 | 4.1219 | 3.4570 | 0.1317 | 3.9604 | 4.4047 | 2.8763 | 7.4129 | 0.5994 | 540.2595 | 905.7922 | 186.1542 | 812.3945 | 0.0999 | Arizona State University Campus Immersion | 5000 | 0 | 46.2556 | 6.0617 | 257.3785 | 36.7406 | 47.1595 | 2.8717 | 2.9295 | 12.0202 | 12.4221 | 9.5262 | 9.6062 | 0.0000 | 1.3195 | 25.9147 | 24.3477 | 3.8565 | 4.0494 | 0.1214 | 3.6645 | 4.8243 | 2.8417 | NA | 0.8347 | 514.7570 | 1010.0408 | 167.6446 | 852.7580 | 0.1467 | Arizona State University Campus Immersion | 5000 | 0 | 34.3570 | 0.3197 | 159.5013 | 26.6688 | 32.9522 | 0.7705 | 0.5848 | 5.9418 | 7.6118 | 3.6735 | 1.8585 | 0.0000 | 0.3620 | 16.7592 | 16.5540 | 2.7123 | 3.6223 | 0.0537 | 2.4901 | 2.2013 | 0 | 21.3690 | 0 | 340.3716 | 824.0440 | 117.3291 | 637.9806 | 0.1099 | Arizona State University Campus Immersion | 5000 | 0 | 36.7206 | 2.5096 | 185.2749 | 25.5507 | 34.8055 | 0.5418 | 1.8960 | 9.0210 | 8.0303 | 9.7276 | 6.0060 | 0.0000 | 0.6536 | 18.1995 | 14.4115 | 3.4443 | 3.5373 | 0.0924 | 3.2742 | 3.7599 | 2.7571 | NA | 0.3286 | 370.5498 | 876.2341 | 114.4475 | 683.2726 | 0.1079 |
| 230728 | Utah State University | 643515 | 0 | 28.9837 | 2.5594 | 164.6238 | 25.9367 | 19.6846 | 1.5067 | 2.4971 | 12.6717 | 5.2653 | 3.9487 | 2.5891 | 0.0000 | 0.0864 | 24.3834 | 3.6559 | 8.2367 | 1.7304 | 2.1896 | 2.8463 | 9.4408 | 2.8851 | -1.5281 | 0.815 | 327.7195 | 361.2401 | 80.7451 | 756.1246 | 0.0756 | Utah State University | 592917 | 0 | 33.3670 | 3.6698 | 170.5178 | 26.7240 | 18.5727 | 1.1157 | 2.3268 | 12.6188 | 5.1281 | 3.5270 | 2.4647 | 0.0000 | 0.0697 | 26.1098 | 3.2986 | 9.4312 | 2.1649 | 2.1993 | 2.8253 | 9.1589 | 2.4587 | 3.1749 | 0.8087 | 344.2105 | 384.3851 | 104.3401 | 967.1165 | 0.0827 | Utah State University | 681250 | 0 | 34.9412 | 2.9694 | 170.7592 | 27.2176 | 19.6868 | 1.6056 | 2.0146 | 13.2278 | 6.6037 | 3.6020 | 2.0564 | 0.0000 | 0.0680 | 19.4534 | 3.7326 | 10.5997 | 3.6285 | 2.3302 | 2.5995 | 7.6358 | 2.9544 | NA | 0.9381 | 341.5185 | 374.7711 | 91.4608 | 1040.0575 | 0.0860 | Utah State University | 468750 | 0 | 37.2324 | 0.2066 | 153.1890 | 28.8647 | 20.6053 | 2.5210 | 0.4883 | 7.7984 | 5.0243 | 2.1563 | 1.0618 | 0.0000 | 0.0000 | 17.7481 | 2.6906 | 8.8183 | 3.3512 | 1.8470 | 2.1295 | 8.6532 | 0 | 3.3751 | 0 | 309.7531 | 388.1709 | 123.3266 | 1085.3170 | 0.1033 | Utah State University | 615887 | 0 | 41.7977 | 3.1118 | 184.7208 | 30.2111 | 23.0583 | 0.0000 | 3.3176 | 15.3174 | 6.5673 | 5.0228 | 1.9147 | 0.0000 | 0.0690 | 15.5237 | 3.3494 | 8.0400 | 3.3684 | 2.4717 | 3.2543 | 10.9671 | 3.5655 | 1.4392 | 1.1364 | 370.8807 | 406.6803 | 184.8010 | 1155.7511 | 0.1001 |
| 110635 | University of California-Berkeley | 0 | 0 | 94.7291 | 9.4670 | 559.9209 | 65.9807 | 104.0745 | 7.1333 | 7.0773 | 32.1560 | 31.1610 | 23.1575 | 19.8679 | 0.0000 | 0.7952 | 104.8157 | 25.7552 | 0.0000 | 11.9915 | 0.4625 | 7.8440 | 13.4526 | 0.0000 | -97.8007 | 0.000 | 1022.0412 | 1005.3896 | 1599.6485 | 3423.4898 | 0.1053 | University of California-Berkeley | 0 | 0 | 104.6133 | 6.7396 | 516.7996 | 58.1813 | 106.5069 | 4.0517 | 8.5717 | 33.9408 | 21.8693 | 25.3109 | 23.2661 | 0.0000 | 0.7928 | 53.8087 | 26.6216 | 0.0000 | 12.8684 | 0.1276 | 4.9214 | 20.2082 | 3.4305 | NA | 0.9689 | 1033.5991 | 991.9017 | 1544.7587 | 3211.0850 | 0.1108 | University of California-Berkeley | 0 | 0 | 99.2056 | 5.2347 | 479.8975 | 51.3990 | 111.9221 | 5.2564 | 6.7153 | 26.1962 | 25.0012 | 24.0883 | 20.9564 | 0.0000 | 1.0849 | 43.8998 | 22.4038 | 0.0000 | 11.3223 | 0.2656 | 9.3301 | 13.5040 | 1.4203 | 15.1521 | 0.6915 | 974.9471 | 1013.1897 | 1540.0773 | 3263.4672 | 0.1010 | University of California-Berkeley | 0 | 0 | 109.8929 | 1.0012 | 442.7973 | 54.0061 | 112.7593 | 4.8814 | 1.8238 | 20.7048 | 23.3729 | 6.5183 | 9.4563 | 0.0000 | 0.0099 | 44.2207 | 15.3756 | 0.0000 | 13.6222 | 0.4078 | 6.7051 | 18.0392 | NA | 17.2954 | 0 | 902.8900 | 1198.0979 | 274.4681 | 3505.5062 | 0.1061 | University of California-Berkeley | 0 | 0 | 114.8500 | 8.5324 | 545.4876 | 62.5541 | 122.4546 | 2.6376 | 9.8075 | 38.6766 | 27.2219 | 23.7132 | 20.2260 | 0.0000 | 0.5349 | 45.0169 | 25.2711 | 0.0000 | 15.0387 | 0.5890 | 10.9084 | 17.4548 | 0.0000 | 17.7547 | 0.0000 | 1108.7300 | 1403.8958 | 1866.4224 | 3684.6140 | 0.0930 |
| 203517 | Kent State University at Kent | 0 | 0 | 31.2298 | 2.7567 | 144.0878 | 30.6634 | 23.9207 | 0.0000 | 2.7993 | 14.2172 | 8.5730 | 3.1329 | 5.2735 | 2.1600 | 0.6536 | 1.1767 | 3.2656 | 0.0000 | 0.7915 | 1.4012 | 0.4449 | 11.6278 | 0.0000 | -5.7816 | 0.000 | 282.3940 | 344.6302 | 39.3381 | 79.6657 | 0.0368 | Kent State University at Kent | 0 | 0 | 29.7679 | 2.2981 | 143.6486 | 31.3794 | 25.1836 | 0.0000 | 3.3160 | 15.6979 | 10.1725 | 3.7700 | 5.0160 | 2.4211 | 0.9401 | 0.0000 | 3.4241 | 0.0000 | 1.2140 | 0.2437 | 0.5339 | 8.2701 | 0.0000 | NA | 0.0000 | 287.2973 | 374.1530 | 48.4714 | 102.4385 | 0.0452 | Kent State University at Kent | 0 | 0 | 30.3060 | 1.9892 | 142.8182 | 33.1480 | 27.1937 | 0.0000 | 2.4610 | 14.2244 | 8.5900 | 3.2317 | 4.2812 | 1.7596 | 0.4722 | 1.7581 | 3.5765 | 0.0000 | 2.0972 | 1.5248 | 0.4159 | 3.6640 | 2.1247 | NA | 0.0000 | 285.6365 | 384.9414 | 5.5103 | 119.6426 | 0.0554 | Kent State University at Kent | 0 | 0 | 31.8172 | 0.2071 | 105.3177 | 30.1091 | 20.4221 | 0.0000 | 0.4211 | 5.8407 | 5.0451 | 1.3099 | 0.6934 | 0.0603 | 0.3179 | 1.7155 | 0.0000 | 0.0000 | 1.6573 | 1.0945 | 0.0000 | 4.6066 | 0 | NA | 0 | 210.6355 | 371.1401 | 22.2887 | 107.2095 | 0.0572 | Kent State University at Kent | 0 | 0 | 32.3973 | 2.2728 | 148.6267 | 38.3760 | 26.1164 | 0.0000 | 2.9433 | 17.0268 | 7.2225 | 3.0836 | 3.2131 | 1.6284 | 0.2030 | 0.0000 | 3.6370 | 0.0000 | 2.2904 | 2.5105 | 0.7273 | 3.2513 | 1.7271 | NA | 0.0000 | 297.2534 | 479.6363 | 37.1558 | 127.0126 | 0.0623 |
# Gather data into long format for easier plotting
long_data <- norm_df %>%
select(UNITID, total_ops_2018, total_ops_2019, total_ops_2020, total_ops_2021, total_ops_2022) %>%
pivot_longer(cols = starts_with("total_ops"),
names_to = "year",
values_to = "total_ops") %>%
mutate(year = gsub("total_ops_", "", year))
# Summarize total athletic spending by year
summary_data <- long_data %>%
group_by(year) %>%
summarize(mean_total_ops = mean(total_ops, na.rm = TRUE))
# Create barplot with revised units
ggplot(data = summary_data, aes(x = year, y = mean_total_ops, fill = year)) +
geom_bar(stat = "identity", alpha = 0.8, color = "black") +
labs(title = "Average Per-Student Athletic Spending Across Years (2018–2022)",
x = "Year",
y = "Mean Athletic Spending Per Student (USD)",
fill = "Year") +
theme_minimal() +
scale_fill_brewer(palette = "Set3")The plot shows the mean total athletic spending from 2018 to 2022, highlighting a decline in 2021, possibly due to disruptions like the COVID-19 pandemic. Spending increased again in 2022, surpassing previous years, indicating a recovery or increased investment in athletics.
Make a Means dataframe for means across 5 years of the variables
# Create a dataframe for calculating means for the specified columns
means_df <- norm_df %>%
rowwise() %>%
mutate(
coach_comp_third = mean(c(coach_comp_third_2018, coach_comp_third_2019, coach_comp_third_2020, coach_comp_third_2021, coach_comp_third_2022), na.rm = TRUE),
support_comp_third = mean(c(support_comp_third_2018, support_comp_third_2019, support_comp_third_2020, support_comp_third_2021, support_comp_third_2022), na.rm = TRUE),
coach_comp = mean(c(coach_comp_2018, coach_comp_2019, coach_comp_2020, coach_comp_2021, coach_comp_2022), na.rm = TRUE),
guarantees = mean(c(guarantees_2018, guarantees_2019, guarantees_2020, guarantees_2021, guarantees_2022), na.rm = TRUE),
total_ops = mean(c(total_ops_2018, total_ops_2019, total_ops_2020, total_ops_2021, total_ops_2022), na.rm = TRUE),
aid = mean(c(aid_2018, aid_2019, aid_2020, aid_2021, aid_2022), na.rm = TRUE),
support_comp = mean(c(support_comp_2018, support_comp_2019, support_comp_2020, support_comp_2021, support_comp_2022), na.rm = TRUE),
severance = mean(c(severance_2018, severance_2019, severance_2020, severance_2021, severance_2022), na.rm = TRUE),
recruiting = mean(c(recruiting_2018, recruiting_2019, recruiting_2020, recruiting_2021, recruiting_2022), na.rm = TRUE),
team_travel = mean(c(teamtravel_2018, teamtravel_2019, teamtravel_2020, teamtravel_2021, teamtravel_2022), na.rm = TRUE),
equip = mean(c(equip_2018, equip_2019, equip_2020, equip_2021, equip_2022), na.rm = TRUE),
game_expenses = mean(c(game_expenses_2018, game_expenses_2019, game_expenses_2020, game_expenses_2021, game_expenses_2022), na.rm = TRUE),
marketing = mean(c(marketing_2018, marketing_2019, marketing_2020, marketing_2021, marketing_2022), na.rm = TRUE),
camps = mean(c(camps_2018, camps_2019, camps_2020, camps_2021, camps_2022), na.rm = TRUE),
spirits = mean(c(spirit_2018, spirit_2019, spirit_2020, spirit_2021, spirit_2022), na.rm = TRUE),
facilities = mean(c(facilities_rentals_2018, facilities_rentals_2019, facilities_rentals_2020, facilities_rentals_2021, facilities_rentals_2022), na.rm = TRUE),
overhead = mean(c(overhead_2018, overhead_2019, overhead_2020, overhead_2021, overhead_2022), na.rm = TRUE),
indirect_institutional = mean(c(indirect_institutional_2018, indirect_institutional_2019, indirect_institutional_2020, indirect_institutional_2021, indirect_institutional_2022), na.rm = TRUE),
medical = mean(c(medical_2018, medical_2019, medical_2020, medical_2021, medical_2022), na.rm = TRUE),
membership = mean(c(memberships_2018, memberships_2019, memberships_2020, memberships_2021, memberships_2022), na.rm = TRUE),
meals = mean(c(meals_2018, meals_2019, meals_2020, meals_2021, meals_2022), na.rm = TRUE),
otherops = mean(c(otherops_2018, otherops_2019, otherops_2020, otherops_2021, otherops_2022), na.rm = TRUE),
bowl_expenses = mean(c(bowl_expense_2018, bowl_expense_2019, bowl_expense_2020, bowl_expense_2021, bowl_expense_2022), na.rm = TRUE),
prof_def = mean(c(prof_def_2018, prof_def_2019, prof_def_2020, prof_def_2021, prof_def_2022), na.rm = TRUE),
bowl_coachcomp = mean(c(bowl_coachcomp_2018, bowl_coachcomp_2019, bowl_coachcomp_2020, bowl_coachcomp_2021, bowl_coachcomp_2022), na.rm = TRUE),
total_expenditure = mean(c(total_expendature.x, total_expendature.y, total_expendature.x.x, total_expendature.y.y), na.rm = TRUE),
discounts_to_tuition = mean(c(discounts_to_tuition.x, discounts_to_tuition.y, discounts_to_tuition.x.x, discounts_to_tuition.y.y), na.rm = TRUE),
gifts = mean(c(gifts.x, gifts.y, gifts.x.x, gifts.y.y), na.rm = TRUE),
research_expenditure = mean(c(reseach_expendatures.x, reseach_expendatures.y, reseach_expendatures.x.x, reseach_expendatures.y.y), na.rm = TRUE),
graduation_rate = mean(c(graduation_rate.x, graduation_rate.y, graduation_rate.x.x, graduation_rate.y.y), na.rm = TRUE)
) %>%
ungroup() %>%
select(UNITID, school_name.x, coach_comp_third, support_comp_third, coach_comp, guarantees, total_ops, aid,
support_comp, severance, recruiting, team_travel, equip, game_expenses, marketing, camps, spirits, facilities,
overhead, indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, bowl_coachcomp,
total_expenditure, discounts_to_tuition, gifts, research_expenditure, graduation_rate)colnames(means_df)[colnames(means_df) == "school_name.x"] <- "school_name"
sample_n(means_df, 5)| UNITID | school_name | coach_comp_third | support_comp_third | coach_comp | guarantees | total_ops | aid | support_comp | severance | recruiting | team_travel | equip | game_expenses | marketing | camps | spirits | facilities | overhead | indirect_institutional | medical | membership | meals | otherops | bowl_expenses | prof_def | bowl_coachcomp | total_expenditure | discounts_to_tuition | gifts | research_expenditure | graduation_rate |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 237525 | Marshall University | 0 | 0.00000 | 26.73354 | 3.51122 | 152.38260 | 34.40386 | 22.08134 | 0.80580 | 2.80142 | 15.34270 | 5.16592 | 4.77868 | 3.45652 | 0.45678 | 0.64628 | 4.52558 | 5.54316 | 4.69456 | 5.52618 | 1.73744 | 0.18344 | 6.65998 | 2.60452 | -2.505900 | 0.72370 | 285.1008 | 186.8722 | 5.912425 | 78.97922 | 0.190750 |
| 240727 | University of Wyoming | 143840 | 0.10232 | 25.07428 | 2.84374 | 126.50644 | 20.49662 | 25.50908 | 0.12744 | 2.06770 | 8.46388 | 3.08884 | 3.22246 | 14.57676 | 0.00000 | 0.23294 | 0.93744 | 3.97162 | 1.48634 | 3.09566 | 1.46486 | 3.31734 | 4.35856 | 1.27412 | 6.851033 | 0.37874 | 256.3442 | 117.1760 | 84.771600 | 280.57642 | 0.125425 |
| 147703 | Northern Illinois University | 0 | 0.00000 | 14.10728 | 1.25626 | 72.19782 | 21.71176 | 8.94982 | 0.02150 | 0.76152 | 6.22568 | 2.59596 | 2.00182 | 1.78804 | 0.32068 | 0.16468 | 1.86960 | 2.05304 | 1.48158 | 1.52224 | 0.59768 | 0.47790 | 2.98002 | 1.15786 | 1.889800 | 0.15284 | 145.3225 | 174.9625 | 0.000000 | 66.29958 | 0.065225 |
| 214777 | Pennsylvania State University-Main Campus | 0 | 0.00000 | 282.41374 | 22.03748 | 1381.10088 | 190.33044 | 251.90858 | 4.59506 | 21.67246 | 65.59184 | 32.79056 | 78.15078 | 13.18416 | 0.00000 | 2.93644 | 147.07454 | 103.22010 | 0.00000 | 17.09872 | 0.84410 | 16.72766 | 107.18032 | 19.49210 | 71.272875 | 3.85174 | 2683.2919 | NaN | NaN | NaN | 0.174325 |
| 201885 | University of Cincinnati-Main Campus | 0 | 0.00000 | 61.74038 | 5.19000 | 325.88182 | 47.24372 | 38.85760 | 1.05278 | 3.76404 | 19.82102 | 3.08242 | 9.23016 | 15.37966 | 0.00000 | 1.09138 | 59.35060 | 6.50934 | 10.20676 | 2.79560 | 0.50214 | 3.52860 | 28.41996 | 6.39024 | 17.070175 | 1.72552 | 641.8050 | 497.5329 | 286.496475 | 956.01500 | 0.111725 |
# Select relevant columns for analysis
two_groups_df <- means_df %>%
select(total_ops, discounts_to_tuition, gifts, graduation_rate, research_expenditure)
# Scale 'total_ops' into spending groups (Low, High)
two_groups_df <- two_groups_df %>%
mutate(total_ops_group = cut(
total_ops,
breaks = quantile(total_ops, probs = seq(0, 1, 0.5), na.rm = TRUE),
labels = c("Low", "High"),
include.lowest = TRUE
))
two_groups_df <- two_groups_df %>% filter(!is.na(total_ops_group))
# Select relevant columns for analysis
three_groups_df <- means_df %>%
select(total_ops, discounts_to_tuition, gifts, graduation_rate, research_expenditure)
# Scale 'total_ops' into spending groups (Low, Medium, High)
three_groups_df <- three_groups_df %>%
mutate(total_ops_group = cut(
total_ops,
breaks = quantile(total_ops, probs = seq(0, 1, 0.33), na.rm = TRUE),
labels = c("Low", "Medium", "High"),
include.lowest = TRUE
))
three_groups_df <- three_groups_df %>% filter(!is.na(total_ops_group))dependent_vars <- c("discounts_to_tuition", "gifts", "graduation_rate", "research_expenditure")
independent_var <- "total_ops_group"
two_groups_subset <- two_groups_df[, c(dependent_vars, independent_var)]
two_groups_subset <- na.omit(two_groups_subset)
three_groups_subset <- three_groups_df[, c(dependent_vars, independent_var)]
three_groups_subset <- na.omit(three_groups_subset)head(three_groups_df, 5)| total_ops | discounts_to_tuition | gifts | graduation_rate | research_expenditure | total_ops_group |
|---|---|---|---|---|---|
| 235.10310 | 518.9550 | 270.18552 | 0.391975 | 2108.97807 | Medium |
| 539.96740 | 822.0061 | 148.70020 | 0.129600 | 278.65440 | High |
| 791.66188 | 706.2327 | 257.76210 | 0.055350 | 1227.54595 | High |
| 85.87798 | 101.6097 | 42.18203 | 0.213800 | 99.05178 | Low |
| 231.02018 | 896.3296 | 160.60310 | 0.112125 | 763.93555 | Medium |
# Calculate group means
group_means <- three_groups_df %>%
group_by(total_ops_group) %>%
summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))Warning: There was 1 warning in `summarize()`.
ℹ In argument: `across(...)`.
ℹ In group 1: `total_ops_group = Low`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
# Barplot for means
ggplot(group_means, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Discounts to Tuition by Athletic Spending Group", x = "Spending Group", y = "Mean Discounts to Tuition Per Student (USD)") +
theme_minimal()ggplot(group_means, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Gifts by Athletic Spending Group", x = "Spending Group", y = "Mean Gifts Per Student (USD)") +
theme_minimal()ggplot(group_means, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Research Expenditure by Athletic Spending Group", x = "Spending Group", y = "Mean Research Expenditure Per Student USD") +
theme_minimal()ggplot(group_means, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Graduation Rate by Athletic Spending Group", x = "Spending Group", y = "Mean Graduation Rate") +
theme_minimal()# Select relevant columns for analysis
three_groups_df_1 <- means_df %>%
select(coach_comp, discounts_to_tuition, gifts, graduation_rate, research_expenditure)
# Scale 'coach_comp' into spending groups (Low, Medium, High)
three_groups_df_1 <- three_groups_df_1 %>%
mutate(coach_comp_group = cut(
coach_comp,
breaks = quantile(coach_comp, probs = seq(0, 1, 0.33), na.rm = TRUE),
labels = c("Low", "Medium", "High"),
include.lowest = TRUE
))
# Filter out rows with missing coach_comp_group
three_groups_df_1 <- three_groups_df_1 %>% filter(!is.na(coach_comp_group))# Calculate group means
group_means_1 <- three_groups_df_1 %>%
group_by(coach_comp_group) %>%
summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = discounts_to_tuition, fill = coach_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Discounts to Tuition by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Discounts to Tuition Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = gifts, fill = coach_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Gifts by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Gifts Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = research_expenditure, fill = coach_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Research Expenditure by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Research Expenditure Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = graduation_rate, fill = coach_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Graduation Numbers by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Graduation Numbers") +
theme_minimal()# Select relevant columns for analysis
three_groups_df_2 <- means_df %>%
select(support_comp, discounts_to_tuition, gifts, graduation_rate, research_expenditure)
# Scale 'support_comp' into spending groups (Low, Medium, High)
three_groups_df_2 <- three_groups_df_2 %>%
mutate(support_comp_group = cut(
support_comp,
breaks = quantile(support_comp, probs = seq(0, 1, 0.33), na.rm = TRUE),
labels = c("Low", "Medium", "High"),
include.lowest = TRUE
))
# Filter out rows with missing coach_comp_group
three_groups_df_2 <- three_groups_df_2 %>% filter(!is.na(support_comp_group))head(three_groups_df_2)| support_comp | discounts_to_tuition | gifts | graduation_rate | research_expenditure | support_comp_group |
|---|---|---|---|---|---|
| 37.36655 | 518.9550 | 270.18552 | 0.391975 | 2108.97807 | Medium |
| 96.57464 | 822.0061 | 148.70020 | 0.129600 | 278.65440 | High |
| 145.34364 | 706.2327 | 257.76210 | 0.055350 | 1227.54595 | High |
| 13.02626 | 101.6097 | 42.18203 | 0.213800 | 99.05178 | Low |
| 40.94726 | 896.3296 | 160.60310 | 0.112125 | 763.93555 | Medium |
| 72.48252 | 1051.1068 | 352.52637 | 0.048650 | 2216.03515 | High |
# Calculate group means
group_means_2 <- three_groups_df_2 %>%
group_by(support_comp_group) %>%
summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = discounts_to_tuition, fill = support_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Discounts to Tuition by Support Compensation Group", x = "Support Compensation Group", y = "Mean Discounts to Tuition Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = gifts, fill = support_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Gifts by Support Compensation Group", x = "Support Compensation Group", y = "Mean Gifts Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = research_expenditure, fill = support_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Research Expenditure by Support Compensationn Group", x = "Support Compensation Group", y = "Mean Research Expenditure Per Student (USD)") +
theme_minimal()# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = graduation_rate, fill = support_comp_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Graduation Numbers by Support Compensation Group", x = "Support Compensation Group", y = "Mean Graduation Numbers") +
theme_minimal()# Compute correlations
correlation_matrix <- three_groups_df %>%
select(discounts_to_tuition, gifts, graduation_rate, research_expenditure) %>%
cor(use = "complete.obs")
# Heatmap
melted_corr <- melt(correlation_matrix)
ggplot(melted_corr, aes(Var1, Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab") +
theme_minimal() +
labs(title = "Correlation Heatmap", x = "", y = "")Variables with strong correlations (e.g., discounts_to_tuition and gifts) might be influencing each other and could provide insights when analyzed further. Weaker correlations (e.g., involving graduation_rate) suggest these variables may be independent or have limited direct relationships with others.
# Violin plot
ggplot(three_groups_df, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, fill = "white") +
labs(title = "Tuition Discounts by Athletic Spending Group", x = "Athletic Spending Group", y = "discounts to tuition") +
theme_minimal()ggplot(three_groups_df, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, fill = "white") +
labs(title = "Gifts by Athletic Spending Group", x = "Athletic Spending Group", y = "gifts") +
theme_minimal()ggplot(three_groups_df, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, fill = "white") +
labs(title = "Research Expenditure by Athletic Spending Group", x = "Athletic Spending Group", y = "Research Expenditure") +
theme_minimal()ggplot(three_groups_df, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, fill = "white") +
labs(title = "Graduation Rate by Athletic Spending Group", x = "Athletic Spending Group", y = "Graduation Rate") +
theme_minimal()head(two_groups_df, 5)| total_ops | discounts_to_tuition | gifts | graduation_rate | research_expenditure | total_ops_group |
|---|---|---|---|---|---|
| 235.10310 | 518.9550 | 270.18552 | 0.391975 | 2108.97807 | Low |
| 539.96740 | 822.0061 | 148.70020 | 0.129600 | 278.65440 | High |
| 791.66188 | 706.2327 | 257.76210 | 0.055350 | 1227.54595 | High |
| 85.87798 | 101.6097 | 42.18203 | 0.213800 | 99.05178 | Low |
| 231.02018 | 896.3296 | 160.60310 | 0.112125 | 763.93555 | Low |
# Calculate group means
group_means <- two_groups_df %>%
group_by(total_ops_group) %>%
summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))
# Barplot for means: Discounts to Tuition
ggplot(group_means, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Discounts to Tuition by Athletic Spending Group (Two Groups)",
x = "Spending Group",
y = "Mean Discounts to Tuition") +
theme_minimal()# Barplot for means: Gifts
ggplot(group_means, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Gifts by Athletic Spending Group (Two Groups)",
x = "Spending Group",
y = "Mean Gifts") +
theme_minimal()# Barplot for means: Research Expenditure
ggplot(group_means, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Research Expenditure by Athletic Spending Group (Two Groups)",
x = "Spending Group",
y = "Mean Research Expenditure") +
theme_minimal()# Barplot for means: Graduation Rate
ggplot(group_means, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Mean Graduation Rate by Athletic Spending Group (Two Groups)",
x = "Spending Group",
y = "Mean Graduation Rate") +
theme_minimal()Which Schools has the most spending? Which schools has the least athletic spending?
# Select top 20 schools by total expenditure
top_schools <- means_df %>%
arrange(desc(total_ops)) %>%
slice_head(n = 20)
# Set the order of `school_name` based on the ranking of total expenditure
top_schools <- top_schools %>%
mutate(school_name = factor(school_name, levels = school_name[order(desc(total_ops))]))
# Ensure the school names are ranked by total_ops
top_schools <- top_schools %>%
mutate(school_name.x = factor(school_name, levels = school_name[order(desc(total_ops))]))
# Barplot for top 20 schools expenditures (normalized per student)
ggplot(top_schools, aes(x = reorder(school_name, total_ops), y = total_ops)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(title = "Sports Expenditure Top 20 schools",
x = "School Name",
y = "Sports Expenditure Per Student (USD)",
caption = "Data: Means across 2018–2022 (Normalized Per Student)") +
theme_minimal()# Select bottom 20 schools by total expenditure
least_spendy_schools <- means_df %>%
arrange(total_ops) %>% # Arrange in ascending order
slice_head(n = 20) # Select the first 20 rows
# Ensure the school names are ranked by total expenditure (descending order)
least_spendy_schools <- least_spendy_schools %>%
mutate(school_name.x = factor(school_name, levels = school_name[order(desc(total_ops))]))
# Barplot for 20 least spendy schools expenditures
ggplot(least_spendy_schools, aes(x = reorder(school_name.x, -total_ops), y = total_ops)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
coord_flip() + # Flip the coordinates for a horizontal bar chart
labs(title = "Sports Expenditure 20 Least Spendy Schools",
x = "School Name",
y = "Sports Expenditure Per Student (USD)",
caption = "Data: Means across 2018–2022 (Normalized Per Student)") +
theme_minimal()ggplot(least_spendy_schools, aes(x = school_name, y = discounts_to_tuition)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(
title = "Tuition Discounts for Bottom 20 Schools",
x = "School Name",
y = "Tuition Discounts",
caption = "Data: Means across 2018-2022"
) +
theme_minimal()ggplot(least_spendy_schools, aes(x = school_name, y = gifts)) +
geom_bar(stat = "identity", fill = "orange", alpha = 0.8) +
coord_flip() +
labs(title = "Gifts for Bottom 20 Schools",
x = "School Name",
y = "Gifts",
caption = "Data: Means across 2018-2022")ggplot(least_spendy_schools, aes(x = school_name, y = research_expenditure)) +
geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.8) +
coord_flip() +
labs(title = "Research Expenditure for Bottom 20 Schools",
x = "School Name",
y = "Research Expenditure",
caption = "Data: Means across 2018-2022")ggplot(least_spendy_schools, aes(x = school_name, y = graduation_rate)) +
geom_bar(stat = "identity", fill = "purple", alpha = 0.8) +
coord_flip() +
labs(title = "Graduation Rate for Bottom 20 Schools",
x = "School Name",
y = "Graduation Rate",
caption = "Data: Means across 2018-2022")ggplot(top_schools, aes(x = school_name, y = discounts_to_tuition)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(
title = "Tuition Discounts Top 20 Schools",
x = "School Name",
y = "Tuition Discounts Per Student (USD)",
caption = "Data: Means across 2018-2022"
) +
theme_minimal()Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(top_schools, aes(x = school_name, y = gifts)) +
geom_bar(stat = "identity", fill = "orange", alpha = 0.8) +
coord_flip() +
labs(title = "Gifts for Top 20 Schools",
x = "School Name",
y = "Gifts Per Student (USD)",
caption = "Data: Means across 2018-2022")Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(top_schools, aes(x = school_name, y = research_expenditure)) +
geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.8) +
coord_flip() +
labs(title = "Research Expenditure for Top 20 Schools",
x = "School Name",
y = "Research Expenditure Per Student (USD)",
caption = "Data: Means across 2018-2022")Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(top_schools, aes(x = school_name, y = graduation_rate)) +
geom_bar(stat = "identity", fill = "purple", alpha = 0.8) +
coord_flip() +
labs(title = "Graduation Numbers for Top 20 Schools",
x = "School Name",
y = "Normalized Number of Graduates",
caption = "Data: Means across 2018-2022")# Select relevant numeric columns
correlation_data <- means_df %>%
select(total_ops, gifts, research_expenditure, graduation_rate, discounts_to_tuition)
# Compute correlation matrix
correlation_matrix <- round(cor(correlation_data, use = "complete.obs"), 2)
# Convert to long format for heatmap
library(reshape2)
correlation_long <- melt(correlation_matrix)
# Heatmap with enhanced readability
ggplot(correlation_long, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white") +
geom_text(aes(label = value), color = "black", size = 4) + # Add correlation values
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0,
limits = c(-1, 1), name = "Correlation") +
labs(title = "Correlation Between Spending and Benefits",
x = "",
y = "",
fill = "Correlation") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels
axis.text.y = element_text(size = 10))Correlation between each dependent variable & all indp. variables
# Independent Variables vs discounts_to_tuition
selected_data <- means_df %>%
select(coach_comp_third, support_comp, coach_comp, guarantees, severance, recruiting,
team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead,
indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def,
bowl_coachcomp, discounts_to_tuition)
# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")
# Subset correlations for discounts_to_tuition
correlation_subset <- correlation_matrix[1:22, "discounts_to_tuition", drop = FALSE]
# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
rownames_to_column(var = "Independent_Variable") %>%
rename(Correlation = discounts_to_tuition)
# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation),
y = "discounts_to_tuition", fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
labs(title = "Correlation: Independent Variables vs Discounts to Tuition",
x = "Independent Variables",
y = "Discounts to Tuition (Dependent Variable)",
fill = "Correlation") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Independent Variables vs gifts
selected_data <- means_df %>%
select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting,
team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead,
indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def,
bowl_coachcomp, gifts)
# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")
# Subset correlations for gifts
correlation_subset <- correlation_matrix[1:23, "gifts", drop = FALSE]
# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
rownames_to_column(var = "Independent_Variable") %>%
rename(Correlation = gifts)
# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation),
y = "gifts", fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
labs(title = "Correlation: Independent Variables vs Gifts",
x = "Independent Variables",
y = "Gifts (Dependent Variable)",
fill = "Correlation") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Independent Variables vs research_expenditure
selected_data <- means_df %>%
select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting,
team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead,
indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def,
bowl_coachcomp, research_expenditure)
# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")
# Subset correlations for research_expenditure
correlation_subset <- correlation_matrix[1:23, "research_expenditure", drop = FALSE]
# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
rownames_to_column(var = "Independent_Variable") %>%
rename(Correlation = research_expenditure)
# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation),
y = "research_expenditure", fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
labs(title = "Correlation: Independent Variables vs Research Expenditure",
x = "Independent Variables",
y = "Research Expenditure (Dependent Variable)",
fill = "Correlation") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Independent Variables vs graduation_rate
selected_data <- means_df %>%
select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting,
team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead,
indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def,
bowl_coachcomp, graduation_rate)
# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")
# Subset correlations for graduation_rate
correlation_subset <- correlation_matrix[1:23, "graduation_rate", drop = FALSE]
# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
rownames_to_column(var = "Independent_Variable") %>%
rename(Correlation = graduation_rate)
# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation),
y = "graduation_rate", fill = Correlation)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
labs(title = "Correlation: Independent Variables vs Graduation Rate",
x = "Independent Variables",
y = "Graduation Rate (Dependent Variable)",
fill = "Correlation") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Which specific indp. variable correlate to benefits?
# Predicting discounts_to_tuition
m_discounts_to_tuition <- lm(discounts_to_tuition ~ team_travel + support_comp + coach_comp + recruiting + equip + meals + facilities + overhead + guarantees, data = means_df)
summary(m_discounts_to_tuition)
Call:
lm(formula = discounts_to_tuition ~ team_travel + support_comp +
coach_comp + recruiting + equip + meals + facilities + overhead +
guarantees, data = means_df)
Residuals:
Min 1Q Median 3Q Max
-875.10 -146.16 -18.03 131.93 1092.32
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -12.697 59.372 -0.214 0.83112
team_travel 24.300 7.742 3.139 0.00227 **
support_comp 6.099 2.549 2.392 0.01873 *
coach_comp -2.917 3.270 -0.892 0.37462
recruiting 6.153 25.867 0.238 0.81250
equip 5.299 7.911 0.670 0.50462
meals -6.337 9.884 -0.641 0.52297
facilities 3.065 2.101 1.459 0.14787
overhead -2.398 2.082 -1.152 0.25233
guarantees -31.929 11.059 -2.887 0.00482 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 295.9 on 94 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6813, Adjusted R-squared: 0.6508
F-statistic: 22.33 on 9 and 94 DF, p-value: < 2.2e-16
# Predicting gifts
model_gifts <- lm(gifts ~ support_comp + meals + coach_comp + team_travel + overhead + equip + medical + aid + recruiting, data = means_df)
summary(model_gifts)
Call:
lm(formula = gifts ~ support_comp + meals + coach_comp + team_travel +
overhead + equip + medical + aid + recruiting, data = means_df)
Residuals:
Min 1Q Median 3Q Max
-479.34 -75.85 -18.33 43.21 825.39
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -27.68643 46.26097 -0.598 0.550957
support_comp 7.49274 1.91426 3.914 0.000172 ***
meals 16.78122 7.17172 2.340 0.021404 *
coach_comp 0.43783 2.32151 0.189 0.850814
team_travel 1.49863 6.20167 0.242 0.809578
overhead -1.00422 1.51638 -0.662 0.509431
equip -1.81596 5.79703 -0.313 0.754779
medical 6.98794 10.13380 0.690 0.492165
aid -0.07472 1.38160 -0.054 0.956982
recruiting -61.30822 18.54020 -3.307 0.001338 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 216.1 on 94 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6994, Adjusted R-squared: 0.6706
F-statistic: 24.3 on 9 and 94 DF, p-value: < 2.2e-16
summary(means_df$gifts, na.rm = TRUE) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 46.59 124.84 264.98 286.23 1918.27 1
# Predicting research_expenditure
model_research_expenditure <- lm(research_expenditure ~ support_comp + coach_comp + team_travel + overhead + aid + meals, data = means_df)
summary(model_research_expenditure)
Call:
lm(formula = research_expenditure ~ support_comp + coach_comp +
team_travel + overhead + aid + meals, data = means_df)
Residuals:
Min 1Q Median 3Q Max
-2900.7 -533.2 -40.9 225.7 4007.1
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -305.904 192.880 -1.586 0.1160
support_comp 12.608 8.549 1.475 0.1435
coach_comp -4.037 8.958 -0.451 0.6532
team_travel -1.568 26.201 -0.060 0.9524
overhead 12.287 6.828 1.799 0.0751 .
aid 10.656 6.173 1.726 0.0875 .
meals 12.976 32.437 0.400 0.6900
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 982.8 on 97 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6512, Adjusted R-squared: 0.6296
F-statistic: 30.18 on 6 and 97 DF, p-value: < 2.2e-16
# Predicting graduation_rate
model_graduation_rate <- lm(graduation_rate ~ spirits + aid + medical + team_travel + recruiting, data = means_df)
summary(model_graduation_rate)
Call:
lm(formula = graduation_rate ~ spirits + aid + medical + team_travel +
recruiting, data = means_df)
Residuals:
Min 1Q Median 3Q Max
-0.15519 -0.03960 -0.01249 0.03184 0.17955
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.484e-02 1.146e-02 6.528 2.87e-09 ***
spirits 1.905e-02 5.829e-03 3.268 0.00149 **
aid 1.169e-03 3.876e-04 3.016 0.00325 **
medical 4.356e-03 2.882e-03 1.512 0.13383
team_travel -6.448e-05 1.545e-03 -0.042 0.96679
recruiting -1.092e-02 4.422e-03 -2.470 0.01522 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.06423 on 99 degrees of freedom
Multiple R-squared: 0.308, Adjusted R-squared: 0.273
F-statistic: 8.811 on 5 and 99 DF, p-value: 6.062e-07